1   package org.apache.lucene.analysis.ja.dict;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.BufferedInputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  
24  import org.apache.lucene.codecs.CodecUtil;
25  import org.apache.lucene.store.DataInput;
26  import org.apache.lucene.store.InputStreamDataInput;
27  import org.apache.lucene.util.BitUtil;
28  import org.apache.lucene.util.IOUtils;
29  
30  /**
31   * n-gram connection cost data
32   */
33  public final class ConnectionCosts {
34    
35    public static final String FILENAME_SUFFIX = ".dat";
36    public static final String HEADER = "kuromoji_cc";
37    public static final int VERSION = 1;
38    
39    private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
40    
41    private ConnectionCosts() throws IOException {
42      InputStream is = null;
43      short[][] costs = null;
44      boolean success = false;
45      try {
46        is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX);
47        is = new BufferedInputStream(is);
48        final DataInput in = new InputStreamDataInput(is);
49        CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
50        int forwardSize = in.readVInt();
51        int backwardSize = in.readVInt();
52        costs = new short[backwardSize][forwardSize];
53        int accum = 0;
54        for (int j = 0; j < costs.length; j++) {
55          final short[] a = costs[j];
56          for (int i = 0; i < a.length; i++) {
57            accum += in.readZInt();
58            a[i] = (short)accum;
59          }
60        }
61        success = true;
62      } finally {
63        if (success) {
64          IOUtils.close(is);
65        } else {
66          IOUtils.closeWhileHandlingException(is);
67        }
68      }
69      
70      this.costs = costs;
71    }
72    
73    public int get(int forwardId, int backwardId) {
74      return costs[backwardId][forwardId];
75    }
76    
77    public static ConnectionCosts getInstance() {
78      return SingletonHolder.INSTANCE;
79    }
80    
81    private static class SingletonHolder {
82      static final ConnectionCosts INSTANCE;
83      static {
84        try {
85          INSTANCE = new ConnectionCosts();
86        } catch (IOException ioe) {
87          throw new RuntimeException("Cannot load ConnectionCosts.", ioe);
88        }
89      }
90     }
91    
92  }